# 1、创建环境
from pyspark.context import SparkContext

sc = SparkContext(master='local', appName='demo2_map')

# 2、读取数据
students_rdd = sc.textFile("../../data/students.txt")

kv_rdd = students_rdd.map(lambda line: (line.split(",")[-1], line))

num_dict = {
    "一": 0,
    "二": 1,
    "三": 2,
    "四": 3,
    "五": 4,
    "六": 5,
    "七": 6
}


# 自定义分区的函数
def partition_by_fun(clazz):
    num = clazz[2:3]
    partition = num_dict.get(num)
    return partition


# 自定义分区
partition_by_rdd = kv_rdd.partitionBy(numPartitions=7, partitionFunc=partition_by_fun)

partition_by_rdd.saveAsTextFile("../../data/partitionBy")
